{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用两个value模型估计action的价值,取其中小的计算结果,缓解自举"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAR8AAAEXCAYAAACUBEAgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAdg0lEQVR4nO3dfXBU9b0/8Pdu9iGPu0mA7BJJSm5BMRdCNZGw0o7TkhIxRRRolXI1RaqjLgyIw9RYwNFxJvxwplarYuf2CvRajRevEaUgTYOGMq6AkdTwYLS9YHJJdsOD2U0C2Wx2P78/MOe6EDCbZPPNkvdr5syw5/v5Jp8T2DfnIeesTkQERETDTK+6ASIanRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkhLLwefHFFzFx4kTEx8ejsLAQBw4cUNUKESmgJHzeeOMNrF69Gk888QQ++eQTTJ8+HcXFxWhtbVXRDhEpoFNxY2lhYSFuuukmvPDCCwCAUCiErKwsrFixAo899ti3zg+FQmhubkZKSgp0Ol202yWifhIRtLe3IzMzE3r9lfdtDMPUk6a7uxu1tbUoKyvT1un1ehQVFcHlcvU5x+/3w+/3a69PnjyJ3NzcqPdKRAPT1NSECRMmXLFm2MPn9OnTCAaDsNlsYettNhs+++yzPueUl5fjySefvGR9U1MTLBZLVPokosj5fD5kZWUhJSXlW2uHPXwGoqysDKtXr9Ze926gxWJh+BCNQP05HTLs4TN27FjExcXB4/GErfd4PLDb7X3OMZvNMJvNw9EeEQ2TYb/aZTKZkJ+fj+rqam1dKBRCdXU1HA7HcLdDRIooOexavXo1SktLUVBQgBkzZuC3v/0tOjs7sXTpUhXtEJECSsLnrrvuwqlTp7B+/Xq43W5873vfw3vvvXfJSWgiunop+T2fwfL5fLBarfB6vTzhTDSCRPLe5L1dRKQEw4eIlGD4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREhGHz969ezFv3jxkZmZCp9Ph7bffDhsXEaxfvx7jx49HQkICioqK8MUXX4TVnD17FkuWLIHFYkFqaiqWLVuGjo6OQW0IEcWWiMOns7MT06dPx4svvtjn+MaNG/H888/j5Zdfxv79+5GUlITi4mJ0dXVpNUuWLMGRI0dQVVWFHTt2YO/evXjggQcGvhVEFHtkEABIZWWl9joUCondbpdnnnlGW9fW1iZms1lef/11ERE5evSoAJCDBw9qNbt27RKdTicnT57s1/f1er0CQLxe72DaJ6IhFsl7c0jP+Rw/fhxutxtFRUXaOqvVisLCQrhcLgCAy+VCamoqCgoKtJqioiLo9Xrs37+/z6/r9/vh8/nCFiKKbUMaPm63GwBgs9nC1ttsNm3M7XYjIyMjbNxgMCA9PV2ruVh5eTmsVqu2ZGVlDWXbRKRATFztKisrg9fr1ZampibVLRHRIA1p+NjtdgCAx+MJW+/xeLQxu92O1tbWsPGenh6cPXtWq7mY2WyGxWIJW4gotg1p+OTk5MBut6O6ulpb5/P5sH//fjgcDgCAw+FAW1sbamtrtZo9e/YgFAqhsLBwKNshohHMEOmEjo4O/OMf/9BeHz9+HHV1dUhPT0d2djZWrVqFp59+GpMnT0ZOTg7WrVuHzMxM3HHHHQCA66+/Hrfeeivuv/9+vPzyywgEAli+fDnuvvtuZGZmDtmGEdEIF+mltPfff18AXLKUlpaKyIXL7evWrRObzSZms1lmz54tDQ0NYV/jzJkzsnjxYklOThaLxSJLly6V9vb2fvfAS+1EI1Mk702diIjC7BsQn88Hq9UKr9fL8z9EI0gk782YuNpFRFcfhg8RKcHwISIlGD5EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJSIKn/Lyctx0001ISUlBRkYG7rjjDjQ0NITVdHV1wel0YsyYMUhOTsbChQvh8XjCahobG1FSUoLExERkZGRgzZo16OnpGfzWEFHMiCh8ampq4HQ68dFHH6GqqgqBQABz5sxBZ2enVvPII4/g3XffxbZt21BTU4Pm5mYsWLBAGw8GgygpKUF3dzc+/PBDbN26FVu2bMH69euHbquIaOSTQWhtbRUAUlNTIyIibW1tYjQaZdu2bVrNsWPHBIC4XC4REdm5c6fo9Xpxu91azaZNm8RisYjf7+/X9/V6vQJAvF7vYNonoiEWyXtzUOd8vF4vACA9PR0AUFtbi0AggKKiIq1mypQpyM7OhsvlAgC4XC5MmzYNNptNqykuLobP58ORI0f6/D5+vx8+ny9sIaLYNuDwCYVCWLVqFWbNmoWpU6cCANxuN0wmE1JTU8NqbTYb3G63VvPN4Okd7x3rS3l5OaxWq7ZkZWUNtG0iGiEGHD5OpxOHDx9GRUXFUPbTp7KyMni9Xm1pamqK+vckougyDGTS8uXLsWPHDuzduxcTJkzQ1tvtdnR3d6OtrS1s78fj8cBut2s1Bw4cCPt6vVfDemsuZjabYTabB9IqEY1QEe35iAiWL1+OyspK7NmzBzk5OWHj+fn5MBqNqK6u1tY1NDSgsbERDocDAOBwOFBfX4/W1latpqqqChaLBbm5uYPZFiKKIRHt+TidTrz22mvYvn07UlJStHM0VqsVCQkJsFqtWLZsGVavXo309HRYLBasWLECDocDM2fOBADMmTMHubm5uOeee7Bx40a43W6sXbsWTqeTezdEo0kkl9EA9Lls3rxZqzl//rw8/PDDkpaWJomJiXLnnXdKS0tL2Nc5ceKEzJ07VxISEmTs2LHy6KOPSiAQ6HcfvNRONDJF8t7UiYioi76B8fl8sFqt8Hq9sFgsqtshoq9F8t7kvV1EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKTEgO5qJxoKEgoh2NmJUHc3dHFxiEtIgM5kgk6nU90aDQOGDw07EUHg7Fmc2rUL3gMH0H36NPRmMxInTULGbbchJS8Purg41W1SlDF8aFiJCPzNzTjx3HPobGgAvr61MNjRAe+ZM+ior0fmvfdiXHExA+gqx/ChYSMi6Dh6FF++8AL8J09CRNDW3Y1/trfDajLhuykpwLlzOPnHP8I0bhysBQU8BLuKMXxo2AQ7O9H8n/+pBU9jZyfWHTqEBq8XSQYDfnnttbgrJwc4dw6eykqkTJ2KuIQE1W1TlPBqFw0LCYXg2b4dHceOXXgN4P/V1+NoWxuCIvAFAnjh2DEc/uorAMC5//kfBM+fV9gxRRvDh6JORNB++DBO7dypneMBAF8gEFbXHQrBHwwOd3ukCMOHoi7k98Pz1lsItrdr63QAfmi3w/CNczrXWiz4TnIyACDObIZOz3+eVzOe86GoEhGc/eADtH/6adh6nU6H0kmTkGI04q8tLRifkID7r70WGfHxAIC0W26BgU+pvKoxfChqRAT+lha0vPEGpKfnknGDXo+fTpyIRRMnonf/R6fTwWCxXLjUzj2fqxrDh6JGAgH87+bNCJw5c9kanU4HXfgKjJ0zB+bx46PeH6nF/1ooKnoPt3y1tRHNS5w8GbYFC7jXMwrwb5iiwt/SAvebb/Z5uHU5ccnJmLB0KeKSkqLYGY0UDB8acqFAAM2vvgr/1x8q2S96PcYWFyP5+uv5W82jBMOHhpSIwFdXh7YDByKal5iTA/uddwIMnlGD4UNDRkTQ89VXOLl1K6S7u9/zdEYjbAsWIC4lhXs9owjDh4ZOKAR3ZSW6mpoimjbmhz9E2s03M3hGGYYPDYneO9ZPV1WF3ULxbeKzs2H/6U/5+IxRiOFDQyLY0YGm//gPhM6d6/ccnckE+8KFMGVkRLEzGqkYPjRoEgqhZds2nD9+PKJ5aQ4H0r7/fR5ujVIMHxq0jsOHceavf43ocMs8fjyuKS2F3miMYmc0kjF8aFB6Ojtx8o9/RLCjo99zdCYTxv/85zCOGRPFzmikY/jQgEkohDN79qDziy8impc6YwavbhHDhwZGRHD+yy/h/q//iuhwy5SRgWvuvRc6A+9pHu0YPjQgoa4uNL/6Knq83n7P0RmNyPz5z2Gy2bjXQwwfipyI4Kt9++A7dCiieZYbbkDarFkMHgLA8KEB8Le0oPlPf4rojnVjWhrsixZBZzJFsTOKJRGFz6ZNm5CXlweLxQKLxQKHw4Fdu3Zp411dXXA6nRgzZgySk5OxcOFCeDyesK/R2NiIkpISJCYmIiMjA2vWrEFPBP+ISa1QIID/feUVBM6e7f8knQ7jSkqQdO213OshTUThM2HCBGzYsAG1tbX4+OOP8aMf/Qjz58/HkSNHAACPPPII3n33XWzbtg01NTVobm7GggULtPnBYBAlJSXo7u7Ghx9+iK1bt2LLli1Yv3790G4VRYWI4MyePREfbqVMnYqMefP4gDAKoxOJ4FJFH9LT0/HMM89g0aJFGDduHF577TUsWrQIAPDZZ5/h+uuvh8vlwsyZM7Fr1y785Cc/QXNzM2w2GwDg5Zdfxq9+9SucOnUKpsvskvv9fvj9fu21z+dDVlYWvF4vLHzI+LDxnzqFzx9/HN0X7c1eiSE1FZPWrkXStddGsTMaKXw+H6xWa7/emwP+rygYDKKiogKdnZ1wOByora1FIBBAUVGRVjNlyhRkZ2fD5XIBAFwuF6ZNm6YFDwAUFxfD5/Npe099KS8vh9Vq1ZasrKyBtk0DFOrpgaeyMqLggV6PjJISJE6aFL3GKGZFHD719fVITk6G2WzGgw8+iMrKSuTm5sLtdsNkMiE1NTWs3mazwf31E+3cbndY8PSO945dTllZGbxer7Y0RfjIBhocEYHv0CGc3r07onlJU6bwcIsuK+Lf9LruuutQV1cHr9eLN998E6WlpaipqYlGbxqz2Qyz2RzV70GXFzh79sIDwi76hNEriUtJQdYvfwk9P2udLiPi8DGZTJj09W50fn4+Dh48iOeeew533XUXuru70dbWFrb34/F4YLfbAQB2ux0HLnq8Zu/VsN4aGlmkpwee7dsje0CYXg/b7bcj8V/+hVe36LIGvT8cCoXg9/uRn58Po9GI6upqbayhoQGNjY1wOBwAAIfDgfr6erS2tmo1VVVVsFgsyM3NHWwrNMREBB3HjuH0e+9FdAtF0uTJGFdSwucx0xVFtOdTVlaGuXPnIjs7G+3t7XjttdfwwQcfYPfu3bBarVi2bBlWr16N9PR0WCwWrFixAg6HAzNnzgQAzJkzB7m5ubjnnnuwceNGuN1urF27Fk6nk4dVI1CwowNN//7vCHV19XuOPjER1/ziF4hLSuJeD11RROHT2tqKe++9Fy0tLbBarcjLy8Pu3bvx4x//GADw7LPPQq/XY+HChfD7/SguLsZLL72kzY+Li8OOHTvw0EMPweFwICkpCaWlpXjqqaeGdqto0CQUwqndu3G+sbH/k3Q62G6/HclTpjB46FsN+vd8VIjkdwkociKCzs8/xz+ffjqiG0cTcnIw+cknYbzoiieNHsPyez509QqdP4+WioqIgkcfH4/xP/sZDFZrFDujqwnDh8Jot1BE+BnrabNmIXXmTB5uUb8xfCjM+RMn0FJREdEc8zXXXHhAGD/+hiLA8CFN8Nw5NP/pT+jx+fo9Rx8fjwmlpTDwPA9FiOFDAC5c3fpq3z54Iznc0umQ9v3vw1JQwMMtihjDhwBceEDYyVdfBYLBfs8x2+3IXLyYh1s0IAwfQigQgPu//xs9bW39nqMzGnFNaSmMY8dyr4cGhOEzyokIvPv34+zevRHNS/vBD2Dl4RYNAsNnFBMRBE6fRssbb0C6u/s9z5SRgfE/+xl0/LRRGgSGzyjneecdnP/yy/5PiItD5r/9G8zjx3OvhwaF4TOKhfx+tNfXRzTHkpeHNIeDwUODxvAZxc6fOIHzJ070u96Yno4J990HPZ9AQEOA4TOKSTAIhEL9qtXFxSFj/nzEZ2dHuSsaLRg+1C/JubkYN3cuD7doyDB8RrH4a66BecKEb62LS07GNb/4BQ+3aEgxfEYxg9WK8YsWQR8ff9kaXVwcMubN4/OYacgxfEYxnU6HtB/8AOPvvhtxSUmXjptMGDt3Lmzz5/MWChpyEX96BV1d9EYjMm6/HYnf/S7OVFfj3D//CQkGEZ+djTG33AJrQQEPtygqGD4EvcGAlLw8pPzrvyIUDAIi0MXFQWcw8FCLoobhQwAuHILBYECcgf8kaHjwnA8RKcHwISIlGD5EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKDCp8NmzYAJ1Oh1WrVmnrurq64HQ6MWbMGCQnJ2PhwoXweDxh8xobG1FSUoLExERkZGRgzZo16OnpGUwrRBRjBhw+Bw8exO9//3vk5eWFrX/kkUfw7rvvYtu2baipqUFzczMWLFigjQeDQZSUlKC7uxsffvghtm7dii1btmD9+vUD3woiij0yAO3t7TJ58mSpqqqSW265RVauXCkiIm1tbWI0GmXbtm1a7bFjxwSAuFwuERHZuXOn6PV6cbvdWs2mTZvEYrGI3+/v8/t1dXWJ1+vVlqamJgEgXq93IO0TUZR4vd5+vzcHtOfjdDpRUlKCoqKisPW1tbUIBAJh66dMmYLs7Gy4XC4AgMvlwrRp02Cz2bSa4uJi+Hw+HDlypM/vV15eDqvVqi1ZWVkDaZuIRpCIw6eiogKffPIJysvLLxlzu90wmUxITU0NW2+z2eB2u7WabwZP73jvWF/Kysrg9Xq1pampKdK2iWiEiehp4U1NTVi5ciWqqqoQf4UPmhtqZrMZZn58C9FVJaI9n9raWrS2tuLGG2+EwWCAwWBATU0Nnn/+eRgMBthsNnR3d6OtrS1snsfjgd1uBwDY7fZLrn71vu6tIaKrX0ThM3v2bNTX16Ourk5bCgoKsGTJEu3PRqMR1dXV2pyGhgY0NjbC4XAAABwOB+rr69Ha2qrVVFVVwWKxIDc3d4g2i4hGuogOu1JSUjB16tSwdUlJSRgzZoy2ftmyZVi9ejXS09NhsViwYsUKOBwOzJw5EwAwZ84c5Obm4p577sHGjRvhdruxdu1aOJ1OHloRjSJD/glxzz77LPR6PRYuXAi/34/i4mK89NJL2nhcXBx27NiBhx56CA6HA0lJSSgtLcVTTz011K0Q0QimExFR3USkfD4frFYrvF4vLBaL6naI6GuRvDd5bxcRKcHwISIlGD5EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESBtUNDISIAAB8Pp/iTojom3rfk73v0SuJyfA5c+YMACArK0txJ0TUl/b2dlit1ivWxGT4pKenAwAaGxu/dQNHGp/Ph6ysLDQ1NcFisahup9/Y9/CK1b5FBO3t7cjMzPzW2pgMH73+wqkqq9UaU38x32SxWGKyd/Y9vGKx7/7uEPCEMxEpwfAhIiViMnzMZjOeeOIJmM1m1a1ELFZ7Z9/DK1b7joRO+nNNjIhoiMXkng8RxT6GDxEpwfAhIiUYPkSkBMOHiJSIyfB58cUXMXHiRMTHx6OwsBAHDhxQ2s/evXsxb948ZGZmQqfT4e233w4bFxGsX78e48ePR0JCAoqKivDFF1+E1Zw9exZLliyBxWJBamoqli1bho6Ojqj2XV5ejptuugkpKSnIyMjAHXfcgYaGhrCarq4uOJ1OjBkzBsnJyVi4cCE8Hk9YTWNjI0pKSpCYmIiMjAysWbMGPT09Uet706ZNyMvL03771+FwYNeuXSO6575s2LABOp0Oq1atirneh4TEmIqKCjGZTPLKK6/IkSNH5P7775fU1FTxeDzKetq5c6f8+te/lrfeeksASGVlZdj4hg0bxGq1yttvvy1///vf5fbbb5ecnBw5f/68VnPrrbfK9OnT5aOPPpK//e1vMmnSJFm8eHFU+y4uLpbNmzfL4cOHpa6uTm677TbJzs6Wjo4OrebBBx+UrKwsqa6ulo8//lhmzpwpN998szbe09MjU6dOlaKiIjl06JDs3LlTxo4dK2VlZVHr+5133pE///nP8vnnn0tDQ4M8/vjjYjQa5fDhwyO254sdOHBAJk6cKHl5ebJy5UptfSz0PlRiLnxmzJghTqdTex0MBiUzM1PKy8sVdvV/Lg6fUCgkdrtdnnnmGW1dW1ubmM1mef3110VE5OjRowJADh48qNXs2rVLdDqdnDx5cth6b21tFQBSU1Oj9Wk0GmXbtm1azbFjxwSAuFwuEbkQvHq9Xtxut1azadMmsVgs4vf7h633tLQ0+cMf/hATPbe3t8vkyZOlqqpKbrnlFi18YqH3oRRTh13d3d2ora1FUVGRtk6v16OoqAgul0thZ5d3/PhxuN3usJ6tVisKCwu1nl0uF1JTU1FQUKDVFBUVQa/XY//+/cPWq9frBfB/Tw2ora1FIBAI633KlCnIzs4O633atGmw2WxaTXFxMXw+H44cORL1noPBICoqKtDZ2QmHwxETPTudTpSUlIT1CMTGz3soxdRd7adPn0YwGAz7wQOAzWbDZ599pqirK3O73QDQZ8+9Y263GxkZGWHjBoMB6enpWk20hUIhrFq1CrNmzcLUqVO1vkwmE1JTU6/Ye1/b1jsWLfX19XA4HOjq6kJycjIqKyuRm5uLurq6EdszAFRUVOCTTz7BwYMHLxkbyT/vaIip8KHocTqdOHz4MPbt26e6lX657rrrUFdXB6/XizfffBOlpaWoqalR3dYVNTU1YeXKlaiqqkJ8fLzqdpSLqcOusWPHIi4u7pKz/x6PB3a7XVFXV9bb15V6ttvtaG1tDRvv6enB2bNnh2W7li9fjh07duD999/HhAkTtPV2ux3d3d1oa2u7Yu99bVvvWLSYTCZMmjQJ+fn5KC8vx/Tp0/Hcc8+N6J5ra2vR2tqKG2+8EQaDAQaDATU1NXj++edhMBhgs9lGbO/REFPhYzKZkJ+fj+rqam1dKBRCdXU1HA6Hws4uLycnB3a7Paxnn8+H/fv3az07HA60tbWhtrZWq9mzZw9CoRAKCwuj1puIYPny5aisrMSePXuQk5MTNp6fnw+j0RjWe0NDAxobG8N6r6+vDwvPqqoqWCwW5ObmRq33i4VCIfj9/hHd8+zZs1FfX4+6ujptKSgowJIlS7Q/j9Teo0L1Ge9IVVRUiNlsli1btsjRo0flgQcekNTU1LCz/8Otvb1dDh06JIcOHRIA8pvf/EYOHTokX375pYhcuNSempoq27dvl08//VTmz5/f56X2G264Qfbv3y/79u2TyZMnR/1S+0MPPSRWq1U++OADaWlp0ZZz585pNQ8++KBkZ2fLnj175OOPPxaHwyEOh0Mb7730O2fOHKmrq5P33ntPxo0bF9VLv4899pjU1NTI8ePH5dNPP5XHHntMdDqd/OUvfxmxPV/ON692xVrvgxVz4SMi8rvf/U6ys7PFZDLJjBkz5KOPPlLaz/vvvy8ALllKS0tF5MLl9nXr1onNZhOz2SyzZ8+WhoaGsK9x5swZWbx4sSQnJ4vFYpGlS5dKe3t7VPvuq2cAsnnzZq3m/Pnz8vDDD0taWpokJibKnXfeKS0tLWFf58SJEzJ37lxJSEiQsWPHyqOPPiqBQCBqfd93333yne98R0wmk4wbN05mz56tBc9I7flyLg6fWOp9sPg8HyJSIqbO+RDR1YPhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEiJ/w8/zS+w6a3h8QAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 300x300 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import gym\n",
    "\n",
    "\n",
    "#定义环境\n",
    "class MyWrapper(gym.Wrapper):\n",
    "\n",
    "    def __init__(self):\n",
    "        env = gym.make('Pendulum-v1', render_mode='rgb_array')\n",
    "        super().__init__(env)\n",
    "        self.env = env\n",
    "        self.step_n = 0\n",
    "\n",
    "    def reset(self):\n",
    "        state, _ = self.env.reset()\n",
    "        self.step_n = 0\n",
    "        return state\n",
    "\n",
    "    def step(self, action):\n",
    "        state, reward, terminated, truncated, info = self.env.step(\n",
    "            [action * 2])\n",
    "        over = terminated or truncated\n",
    "\n",
    "        #偏移reward,便于训练\n",
    "        reward = (reward + 8) / 8\n",
    "\n",
    "        #限制最大步数\n",
    "        self.step_n += 1\n",
    "        if self.step_n >= 200:\n",
    "            over = True\n",
    "\n",
    "        return state, reward, over\n",
    "\n",
    "    #打印游戏图像\n",
    "    def show(self):\n",
    "        from matplotlib import pyplot as plt\n",
    "        plt.figure(figsize=(3, 3))\n",
    "        plt.imshow(self.env.render())\n",
    "        plt.show()\n",
    "\n",
    "\n",
    "env = MyWrapper()\n",
    "\n",
    "env.reset()\n",
    "\n",
    "env.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.1049],\n",
       "        [-0.1505]], grad_fn=<TanhBackward0>)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "\n",
    "class Model(torch.nn.Module):\n",
    "\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.s = torch.nn.Sequential(\n",
    "            torch.nn.Linear(3, 64),\n",
    "            torch.nn.ReLU(),\n",
    "            torch.nn.Linear(64, 64),\n",
    "            torch.nn.ReLU(),\n",
    "            torch.nn.Linear(64, 1),\n",
    "            torch.nn.Tanh(),\n",
    "        )\n",
    "\n",
    "    def forward(self, state):\n",
    "        return self.s(state)\n",
    "\n",
    "\n",
    "model_action = Model()\n",
    "model_action_delay = Model()\n",
    "model_action_delay.load_state_dict(model_action.state_dict())\n",
    "\n",
    "model_action(torch.randn(2, 3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tensor([[ 0.0432],\n",
       "         [-0.0194]], grad_fn=<AddmmBackward0>),\n",
       " tensor([[-0.0244],\n",
       "         [-0.1560]], grad_fn=<AddmmBackward0>))"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_value1 = torch.nn.Sequential(\n",
    "    torch.nn.Linear(4, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 1),\n",
    ")\n",
    "model_value1_delay = torch.nn.Sequential(\n",
    "    torch.nn.Linear(4, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 1),\n",
    ")\n",
    "model_value1_delay.load_state_dict(model_value1.state_dict())\n",
    "\n",
    "model_value2 = torch.nn.Sequential(\n",
    "    torch.nn.Linear(4, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 1),\n",
    ")\n",
    "model_value2_delay = torch.nn.Sequential(\n",
    "    torch.nn.Linear(4, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 1),\n",
    ")\n",
    "model_value2_delay.load_state_dict(model_value2.state_dict())\n",
    "\n",
    "model_value1(torch.randn(2, 4)), model_value2(torch.randn(2, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3.0491358646021984"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from IPython import display\n",
    "import random\n",
    "\n",
    "\n",
    "#玩一局游戏并记录数据\n",
    "def play(show=False):\n",
    "    data = []\n",
    "    reward_sum = 0\n",
    "\n",
    "    state = env.reset()\n",
    "    over = False\n",
    "    while not over:\n",
    "        action = model_action(torch.FloatTensor(state).reshape(1, 3)).item()\n",
    "\n",
    "        #给动作添加噪声,增加探索\n",
    "        action += random.normalvariate(mu=0, sigma=0.2)\n",
    "\n",
    "        next_state, reward, over = env.step(action)\n",
    "\n",
    "        data.append((state, action, reward, next_state, over))\n",
    "        reward_sum += reward\n",
    "\n",
    "        state = next_state\n",
    "\n",
    "        if show:\n",
    "            display.clear_output(wait=True)\n",
    "            env.show()\n",
    "\n",
    "    return data, reward_sum\n",
    "\n",
    "\n",
    "play()[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_7590/1738991660.py:27: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at  ../torch/csrc/utils/tensor_new.cpp:201.)\n",
      "  state = torch.FloatTensor([i[0] for i in data]).reshape(-1, 3)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(torch.Size([64, 3]),\n",
       " 200,\n",
       " (array([-0.77460074, -0.6324506 , -0.8910659 ], dtype=float32),\n",
       "  -0.027735333185845312,\n",
       "  0.23554229983129993,\n",
       "  array([-0.8161807, -0.5777967, -1.3737245], dtype=float32),\n",
       "  False))"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#数据池\n",
    "class Pool:\n",
    "\n",
    "    def __init__(self):\n",
    "        self.pool = []\n",
    "\n",
    "    def __len__(self):\n",
    "        return len(self.pool)\n",
    "\n",
    "    def __getitem__(self, i):\n",
    "        return self.pool[i]\n",
    "\n",
    "    #更新动作池\n",
    "    def update(self):\n",
    "        #每次更新不少于N条新数据\n",
    "        old_len = len(self.pool)\n",
    "        while len(pool) - old_len < 200:\n",
    "            self.pool.extend(play()[0])\n",
    "\n",
    "        #只保留最新的N条数据\n",
    "        self.pool = self.pool[-2_0000:]\n",
    "\n",
    "    #获取一批数据样本\n",
    "    def sample(self):\n",
    "        data = random.sample(self.pool, 64)\n",
    "\n",
    "        state = torch.FloatTensor([i[0] for i in data]).reshape(-1, 3)\n",
    "        action = torch.FloatTensor([i[1] for i in data]).reshape(-1, 1)\n",
    "        reward = torch.FloatTensor([i[2] for i in data]).reshape(-1, 1)\n",
    "        next_state = torch.FloatTensor([i[3] for i in data]).reshape(-1, 3)\n",
    "        over = torch.LongTensor([i[4] for i in data]).reshape(-1, 1)\n",
    "\n",
    "        return state, action, reward, next_state, over\n",
    "\n",
    "\n",
    "pool = Pool()\n",
    "pool.update()\n",
    "state, action, reward, next_state, over = pool.sample()\n",
    "\n",
    "next_state.shape, len(pool), pool[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer_action = torch.optim.Adam(model_action.parameters(), lr=5e-4)\n",
    "optimizer_value1 = torch.optim.Adam(model_value1.parameters(), lr=5e-3)\n",
    "optimizer_value2 = torch.optim.Adam(model_value2.parameters(), lr=5e-3)\n",
    "\n",
    "\n",
    "def soft_update(_from, _to):\n",
    "    for _from, _to in zip(_from.parameters(), _to.parameters()):\n",
    "        value = _to.data * 0.7 + _from.data * 0.3\n",
    "        _to.data.copy_(value)\n",
    "\n",
    "\n",
    "def requires_grad(model, value):\n",
    "    for param in model.parameters():\n",
    "        param.requires_grad_(value)\n",
    "\n",
    "\n",
    "requires_grad(model_action_delay, False)\n",
    "requires_grad(model_value1_delay, False)\n",
    "requires_grad(model_value2_delay, False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-0.01376472320407629"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def train_action(state):\n",
    "    requires_grad(model_action, True)\n",
    "    requires_grad(model_value1, False)\n",
    "    requires_grad(model_value2, False)\n",
    "\n",
    "    #首先把动作计算出来\n",
    "    action = model_action(state)\n",
    "\n",
    "    #使用value网络评估动作的价值,价值是越高越好\n",
    "    input = torch.cat([state, action], dim=1)\n",
    "    value1 = model_value1(input)\n",
    "    value2 = model_value2(input)\n",
    "    loss = -torch.min(value1, value1).mean()\n",
    "\n",
    "    loss.backward()\n",
    "    optimizer_action.step()\n",
    "    optimizer_action.zero_grad()\n",
    "\n",
    "    return loss.item()\n",
    "\n",
    "\n",
    "train_action(state)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.041669800877571106, 0.0335715152323246)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def train_value(state, action, reward, next_state, over):\n",
    "    requires_grad(model_action, False)\n",
    "    requires_grad(model_value1, True)\n",
    "    requires_grad(model_value2, True)\n",
    "\n",
    "    #计算value\n",
    "    input = torch.cat([state, action], dim=1)\n",
    "    value1 = model_value1(input)\n",
    "    value2 = model_value2(input)\n",
    "\n",
    "    #计算target\n",
    "    next_action = model_action_delay(next_state)\n",
    "    input = torch.cat([next_state, next_action], dim=1)\n",
    "    with torch.no_grad():\n",
    "        target1 = model_value1_delay(input)\n",
    "        target2 = model_value2_delay(input)\n",
    "    target = torch.min(target1, target2)\n",
    "    target = target * 0.99 * (1 - over) + reward\n",
    "\n",
    "    #计算td loss,更新参数\n",
    "    loss1 = torch.nn.functional.mse_loss(value1, target)\n",
    "    loss2 = torch.nn.functional.mse_loss(value2, target)\n",
    "\n",
    "    loss1.backward()\n",
    "    optimizer_value1.step()\n",
    "    optimizer_value1.zero_grad()\n",
    "\n",
    "    loss2.backward()\n",
    "    optimizer_value2.step()\n",
    "    optimizer_value2.zero_grad()\n",
    "\n",
    "    return loss1.item(), loss2.item()\n",
    "\n",
    "\n",
    "train_value(state, action, reward, next_state, over)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 400 10.356381916876254\n",
      "20 4400 59.398461293219114\n",
      "40 8400 93.9003990491715\n",
      "60 12400 179.42523248094273\n",
      "80 16400 178.18461007439137\n",
      "100 20000 181.7873467513263\n",
      "120 20000 184.02346289209098\n",
      "140 20000 181.3078818054768\n",
      "160 20000 182.84553465177405\n",
      "180 20000 172.36964748985565\n"
     ]
    }
   ],
   "source": [
    "#训练\n",
    "def train():\n",
    "    model_action.train()\n",
    "    model_value1.train()\n",
    "    model_value2.train()\n",
    "\n",
    "    #共更新N轮数据\n",
    "    for epoch in range(200):\n",
    "        pool.update()\n",
    "\n",
    "        #每次更新数据后,训练N次\n",
    "        for i in range(200):\n",
    "\n",
    "            #采样N条数据\n",
    "            state, action, reward, next_state, over = pool.sample()\n",
    "\n",
    "            #训练模型\n",
    "            train_action(state)\n",
    "            train_value(state, action, reward, next_state, over)\n",
    "\n",
    "        soft_update(model_action, model_action_delay)\n",
    "        soft_update(model_value1, model_value1_delay)\n",
    "        soft_update(model_value2, model_value2_delay)\n",
    "\n",
    "        if epoch % 20 == 0:\n",
    "            test_result = sum([play()[-1] for _ in range(20)]) / 20\n",
    "            print(epoch, len(pool), test_result)\n",
    "\n",
    "\n",
    "train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAR8AAAEXCAYAAACUBEAgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAkEUlEQVR4nO3df1RUdf4/8OcM8wMQZhCFGVBQSkpJUUOF2batXVmpqM0VO+V6lFq/VgSuLruecrPaOu1Sum2bW2Zn92SdTmbZyXbXtEIs/NGkhKIgSmQmpgyYyAyKDDDz+v5RzKcpwxlguDP5fJwz5zj3vubO61rz9P58X5WICIiIBpla6QaI6NLE8CEiRTB8iEgRDB8iUgTDh4gUwfAhIkUwfIhIEQwfIlIEw4eIFMHwISJFKBY+zz33HEaPHo3w8HBkZmZiz549SrVCRApQJHxef/11FBcX45FHHsHevXsxceJE5OTkoLm5WYl2iEgBKiVuLM3MzMTUqVPx7LPPAgDcbjeSkpKwaNEiPPDAAxf9vNvtxsmTJxEdHQ2VShXodonIRyKCtrY2JCYmQq3ufdtGM0g9eXR2dqKyshLLli3zTFOr1cjOzobVar3gZ5xOJ5xOp+f9iRMnkJaWFvBeiahvjh8/jpEjR/ZaM+jh89VXX8HlcsFkMnlNN5lMOHz48AU/U1JSgkcfffR7048fPw6DwRCQPonIfw6HA0lJSYiOjr5o7aCHT18sW7YMxcXFnvc9K2gwGBg+REHIl8Mhgx4+w4cPR1hYGJqamrymNzU1wWw2X/Azer0eer1+MNojokEy6Ge7dDodMjIyUFZW5pnmdrtRVlYGi8Uy2O0QkUIU2e0qLi5Gfn4+pkyZgmnTpuEf//gHzp07h7vuukuJdohIAYqEz+23345Tp07h4Ycfhs1mw6RJk/Duu+9+7yA0Ef14KXKdT385HA4YjUbY7XYecCYKIv78NnlvFxEpguFDRIpg+BCRIhg+RKQIhg8RKYLhQ0SKYPgQkSIYPkSkCIYPESmC4UNEimD4EJEiGD5EpAiGDxEpguFDRIpg+BCRIhg+RKQIhg8RKYLhQ0SKYPgQkSIYPkSkCIYPESmC4UNEimD4EJEiGD5EpAiGDxEpguFDRIpg+BCRIhg+RKQIhg8RKYLhQ0SKYPgQkSIYPkSkCIYPESmC4UNEimD4EJEi/A6f7du345ZbbkFiYiJUKhXefvttr/kigocffhgJCQmIiIhAdnY26uvrvWpaWlowd+5cGAwGxMTEYMGCBTh79my/VoSIQovf4XPu3DlMnDgRzz333AXnr1ixAqtWrcKaNWuwe/duDBkyBDk5Oejo6PDUzJ07FwcPHkRpaSk2bdqE7du34+677+77WhBR6JF+ACAbN270vHe73WI2m2XlypWeaa2traLX6+W1114TEZHa2loBIBUVFZ6aLVu2iEqlkhMnTvj0vXa7XQCI3W7vT/tENMD8+W0O6DGfo0ePwmazITs72zPNaDQiMzMTVqsVAGC1WhETE4MpU6Z4arKzs6FWq7F79+4LLtfpdMLhcHi9iCi0DWj42Gw2AIDJZPKabjKZPPNsNhvi4+O95ms0GsTGxnpqvqukpARGo9HzSkpKGsi2iUgBIXG2a9myZbDb7Z7X8ePHlW6JiPppQMPHbDYDAJqamrymNzU1eeaZzWY0Nzd7ze/u7kZLS4un5rv0ej0MBoPXi4hC24CGT0pKCsxmM8rKyjzTHA4Hdu/eDYvFAgCwWCxobW1FZWWlp2bbtm1wu93IzMwcyHaIKIhp/P3A2bNn8dlnn3neHz16FFVVVYiNjUVycjKWLFmCxx9/HKmpqUhJScFDDz2ExMREzJw5EwAwbtw43HDDDVi4cCHWrFmDrq4uFBUV4Y477kBiYuKArRgRBTl/T6V98MEHAuB7r/z8fBH5+nT7Qw89JCaTSfR6vUyfPl3q6uq8lnH69GmZM2eOREVFicFgkLvuukva2tp87oGn2omCkz+/TZWIiILZ1ycOhwNGoxF2u53Hf4iCiD+/zZA420VEPz4MHyJSBMOHiBTB8CEiRTB8iEgRDB8iUoTfFxkSBQMRgautDWcPHULHiRNQhYUhIiUFQ1JToQ4Ph0qlUrpFugiGD4UccbvRtn8/TrzyCs4fOwbp6gJUKqj1ekRPmIAR+fkIT0piAAU5hg+FFBFBW3U1vnjmGXS1tHx7BtwdHbBXVKCzpQWXLV0KfUICAyiI8ZgPhRTXuXP48sUXvYPnO84fOYKTr74KuN2D2Bn5i+FDIcX+ySc439Bw8bqKCp/qSDkMHwoZIoLOpibA5bporbujA+7OzkHoivqK4UMhQ7q6cGbXLqXboAHC8KGQca6+Hs4fGOf7u1QaDVRhYQHuiPqD4UMhoecsl/tbz3/rTeRllyEiOTnAXVF/MHwoJEh3N1o//tjneuPUqVBptQHsiPqL4UMhof2zz9Dh41NL1Ho9oidO5DU+QY7hQyHBXln59ZXMPggfORKRl18e4I6ovxg+FPRcHR1wVFX5XD/0mmug0vDi/WDH8KGg19HQgPNHjvhUq46MRNT48dzlCgEMHwp6Zz76COLDhYUAoDeZEHnZZQHuiAYCw4eCmuvcOZw9eNDn+qHXXMOzXCGC4UNBrcNmQ7sfu1yGyZO5yxUiGD4UtEQEZ3bsgHR3+1QfnpCA8JEjA9wVDRSGDwUtV3s72g4c8LneOGUK1OHhAeyIBhLDh4KW8+RJvy4sjLFYuMsVQhg+FJREBK27d8PtdPpUHzF6NPRmc4C7ooHE8KGg5O7ogH3PHp/roydMgDoiIoAd0UBj+FBQOn/sGJxNTT7VqjQaDP3pT7nLFWIYPhR0RARtBw7Aff68T/WRqanc5QpBDB8KOtLVhTM7d/pcHz1+PMIiIwPYEQUCw4eCjr8jFsZkZQW4IwoEhg8FFRHB2Zoa/0YsHDUqwF1RIDB8KKhIdzfOWK0+1xumTOG9XCGK4UNBpb2+3q8LCw2TJvEsV4jyK3xKSkowdepUREdHIz4+HjNnzkRdXZ1XTUdHBwoLCzFs2DBERUUhLy8PTd85ZdrQ0IDc3FxERkYiPj4eS5cuRbeP9+/Qj5eIwL53L0csvET4NdxbeXk5CgsLMXXqVHR3d+NPf/oTZsyYgdraWgwZMgQA8Pvf/x7vvPMONmzYAKPRiKKiIsyaNQu7vnneksvlQm5uLsxmMz766CM0NjZi/vz50Gq1+Otf/zrwa0iKExF0dXWhvb0dJ0+exLFjx9Dc3AyHw4Hhw4dj1qxZ0Ov1cDudcOzb5/NyYzhiYUhTiYj09cOnTp1CfHw8ysvL8bOf/Qx2ux1xcXFYt24dZs+eDQA4fPgwxo0bB6vViqysLGzZsgU333wzTp48CZPJBABYs2YN7r//fpw6dQo6ne6i3+twOGA0GmG322EwGPraPgVYd3c3Tp48ie3bt6OsrAwVFRVobGyEy+WCWq2GSqXCtGnT8MYbbyA6OhrnPv0Udfff79PAYerISKT++c+IGjt2ENaEfOXPb7Nf/2zY7XYAQGxsLACgsrISXV1dyM7O9tSMHTsWycnJnvCxWq2YMGGCJ3gAICcnBwUFBTh48CAmT578ve9xOp1wfuseH4fD0Z+2KcA6Oztx4MABvPLKK9i0aRNOnTqFuLg4jB8/HrfddhtSU1NhMpkQHR2NmJgYRH5zjQ5HLLy09Dl83G43lixZgmuuuQbjx48HANhsNuh0OsTExHjVmkwm2L65bsNms3kFT8/8nnkXUlJSgkcffbSvrdIgEBGICD799FOsWrUKb775JlQqFX7xi19g9uzZyMzMhMlkQlhYmOcA8bcPFPs9YuFPf8qzXCGuz+FTWFiImpoa7PTjStS+WrZsGYqLiz3vHQ4HkpKSAv695BsRQUdHB9atW4cnnngCLS0tyMvLwz333IP09HRoNJqLnpHqaGz0b8RCnuUKeX0Kn6KiImzatAnbt2/HyG+NHGc2m9HZ2YnW1lavrZ+mpiaYv7n3xmw2Y8937lbuORtm/oH7c/R6PfR6fV9apQATETQ3N+PPf/4zXn31VUyePBlr1qzBtdde69Pxu55lnNm50/cRC81mjlj4I+DXqXYRQVFRETZu3Iht27YhJSXFa35GRga0Wi3Kyso80+rq6tDQ0ACLxQIAsFgsqK6uRnNzs6emtLQUBoMBaWlp/VkXGmQigs8//xx33XUX1q9fj3vuuQcbNmzAL37xC5+DB+jDiIVTp3LEwh8Bv7Z8CgsLsW7dOvznP/9BdHS05xiN0WhEREQEjEYjFixYgOLiYsTGxsJgMGDRokWwWCzI+ub+mxkzZiAtLQ3z5s3DihUrYLPZsHz5chQWFnLrJoSICI4cOYKFCxfi0KFDWLFiBebNm4fwPoQCRyy8NPkVPs8//zwA4Prrr/eavnbtWtx5550AgKeffhpqtRp5eXlwOp3IycnB6tWrPbVhYWHYtGkTCgoKYLFYMGTIEOTn5+Oxxx7r35rQoBERNDY2orCwEHV1dVi1ahXy8vIQFhbWp2W1fvwxRyy8BPXrOh+l8Dof5YgI2tvb8bvf/Q5vv/02nn76acydO9dz3Y6/XOfPo+7++3H+iy98qjfPno3EefO45ROk/Plt8t4u8ouIYO3atdiwYQN+97vfYc6cOX0OHoAjFl7KeG06+UxEcODAAaxcuRLXX389Fi9eDG0/rrUREbTt388RCy9R3PIhnzmdTqxYsQJutxuPPPIIjEZjv5YnXV048809f77giIU/Lgwf8omIoLy8HJs3b8bChQsxceLEfu/+tH/2GUcsvIQxfMgn58+fxwsvvICEhATceeed0PTzbnIRQVt1te8jFl5+OSKSk/v1nRRcGD50USKC6upqfPjhh/jNb37jdVV7n5fp74iFGRlQ+XHhIgU/hg9dlNvtxltvvYXw8HDk5eVBre7//zbt9fXo+PJLn2pVOh3v5foRYvjQRbW2tmLr1q3IzMxEampqv5cnInDs2wfp7PSpPiIpye8RC0UEbre7L+3RIGH40EXV19fjyJEjuPHGG/t9rAcA3E4n7Hv3+lwf85Of+D1i4alTp7Bz506E4DW0lwyGD/VKRLBnzx7o9XpMmTJlQJbZcewYzn/+uU+16ogIRI8f79cul4hgx44dePbZZ9Hh4wFtGnwMH+pVd3c3Dhw4gLi4OKSkpAzIcZczVqvvIxaazX7vcnV1dWHDhg3YsWMHjh071pcWaRAwfKhXnZ2dOHLkCFJSUhAdHd3v5bnOncPZmhqf6/syYuHnn3+O7du346uvvkJZWRl3vYIUw4d61dXVhZMnTyIlJWVAznJ9e8RCEcEZpxOffPUV6h0OuL8TEurISBj8vJhRRLB582Y0NzfD7XbjnXfeQXt7e7/7poHHe7uoVy6XCw6H43vjbvfVmV27IC4XRAQN587hoX37UGe3Y4hGg/93xRW4PSUFYd+ETXhCAsL9HC7X4XBg48aNnq2diooKHD16FFdddRVP1QcZbvnQRYkIoqOjB+TH6zp37utlAniyuhq1ra1wicDR1YVnDx1CzZkznlp/RywUEezfvx9VVVWeaXa7He+//z53vYIQw4d80peBwi7G8Z0nk3a63XB+cyBardcjJivLr8BzuVx46623kJCQgMmTJ8NoNOLyyy/Hli1bcN7HO+dp8DB8yCcDdco6LCICAKAC8HOzGZpvhcsVBgNGRUV9XRcZCe03z4Pz1enTp9HR0YH169dj6tSpSEhIwNq1a3HZZZfhcx9P7dPg4TEf6pVKpYJGo0FLSwtEpN+7XsN+/nN8VVoK19mzyB8zBtFaLbY2NiIhIgILr7gC8d/sZg297jpo/BylUqfT4S9/+QuGDh0Kh8OBqKgojB07Fk8++SS3fIIQw4d6pdFoEBcXhy+//HJAjpuEJycjYc4cnHj5ZWg6O3Hb6NGYPXo0eiJNpVIhKi0NppkzofLz7FrP45qcTidsNhtiY2MREREBnU7X77GHaOBxt4t6pdPpMGrUKBw5cgTdPj5XqzcqtRpxN9yAEfn50MbGQqVWQ61SQaVSQa3TwThtGkYtXgydn7tcwNfBpVKpYLfb0djYiOTkZGi1Ws90Ci7c8qFe6XQ6jB07Fq+//jqampqQPABj6qi1WsTn5sJ49dVw7NsHp80GdUQEosaNQ9S4cZ7jQn3V2NiIxsZGpKenD8i1SRQYDB/qlUqlQkZGBl544QUcOnQISUlJA7IVoVKrET5iBMJHjBiALv+PiGDv3r1wuVyYPHnygC6bBhb/WaBe9YRPZGQkysvLlW7norq7u7Ft2zaMGjVqQIb/oMBh+NBFJSQkIDMzE6WlpbDb7Uq306vGxkbs2rUL1157LYYNG6Z0O9QLhg9dVHh4OG699VbU1dXBarUG7dXCIoL3338fZ86cwaxZs3iQOcgxfOiiVCoVsrOzMWLECLz44otBO0aO3W7HK6+8gvT0dGRmZjJ8ghzDh3ySkJCAO+64A1u3bg3KrZ+eu9n379+PO++8E1HfXClNwYvhQz5Rq9WYP38+4uPj8dRTT8HhcCjdkoeIoKmpCc888wzS0tJw6623cqsnBDB8yGejRo1CUVERduzYgVdffRUuH0cjDLTu7m6sXr0a9fX1KC4uRmwfLlCkwcfwIZ+pVCrMnz8fP/vZz/Dkk09i7969iu9+iQi2bt2KNWvWYObMmcjNzeVWT4hg+JDPVCoVDAYDHn/8cWi1WhQXFw/YPV99ISI4dOgQli5dihEjRmD58uUI92P8H1IWw4f8olKpkJ6ejpKSEhw+fBhLlizBqVOnBj2ARATHjh1DYWEhWltb8dRTTw3YAPc0OBg+5De1Wo2ZM2di+fLl2LZtG4qKitDY2DhoASQiqK+vx8KFC1FbW4unnnoK1113HYMnxDB8qE+0Wi3uuecePPjgg9i6dSvy8/NRU1MT8KeEulwuWK1WzJs3DzU1NXjqqaeQl5cXkJEWKbAYPtRn4eHhWLRoEf72t7+htrYWt912G9544w20t7cP+FaQiKCtrQ3/+te/MGfOHLS0tOCFF17AnDlzGDwhiuFD/aLX65Gfn49XX30VMTExuOeee1BQUID9+/cPyPg/wNfPDtu1axfmzZuHpUuX4sorr8Qbb7yBm2++GWFhYdzdClXih9WrV8uECRMkOjpaoqOjJSsrSzZv3uyZf/78ebnvvvskNjZWhgwZIrNmzRKbzea1jGPHjslNN90kEREREhcXJ3/84x+lq6vLnzbEbrcLALHb7X59jgLH7XbLl19+KUuXLpX4+HhJTEyURYsWye7du6W9vV3cbre43W6fl+V2u8XhcEhZWZnMmzdPYmNjZeTIkfL444/LqVOnfF4WDS5/fpsqEd+3j//3v/8hLCwMqampEBG8/PLLWLlyJfbt24errroKBQUFeOedd/DSSy/BaDSiqKgIarUau3btAvD1/vqkSZNgNpuxcuVKNDY2Yv78+Vi4cCH++te/+hyYDocDRqMRdrsdBj/H+aXAERG4XC5UVlZi9erV2LJlC9xuNyZNmoQbbrgBWVlZuPzyyzF06FBoNBqvLRYRQVdXF06fPo36+nrs2rUL7733HqqrqxEZGYmZM2eioKAAaWlpUKvV3NoJUv78Nv0KnwuJjY3FypUrMXv2bMTFxWHdunWYPXs2AODw4cMYN24crFYrsrKysGXLFtx88804efKk5yF0a9aswf33349Tp05Bp9Nd8DucTiecTqfXCiYlJTF8gpjT6URtbS3efPNNvPvuu/jss88QFhaGoUOHIiEhASNGjMDQoUOh1WrhdDpx+vRpnDhxAjabzTNsx7hx45Cbm4uZM2fiiiuugNbPxybT4PMnfPo8kqHL5cKGDRtw7tw5WCwWVFZWoqurC9nZ2Z6asWPHIjk52RM+VqsVEyZM8Hr6ZU5ODgoKCnDw4MEfHHmupKQEjz76aF9bJQXo9XpMmjQJEydOxB/+8AfU1dVhz549qKmpwdGjR1FbW4u2tjZ0d3dDo9HAYDB4nreVnp6OadOmYcyYMYiOjuZQqD9SfodPdXU1LBYLOjo6EBUVhY0bNyItLQ1VVVXQ6XSeJwj0MJlMsNlsAACbzfa9x+72vO+puZBly5ahuLjY875ny4eCW8/A7bGxsbBYLMjKykJ3dze6urrQ3d0Nt9vteRyPWq2GRqOBTqfjQeRLhN/hc+WVV6Kqqgp2ux1vvvkm8vPzAz68pl6vh16vD+h3UOCpVCpotVruPhGAPoSPTqfDmDFjAAAZGRmoqKjAM888g9tvvx2dnZ1obW312vppamqC2WwGAJjNZuzZs8dreU1NTZ55RHTp6PfOtNvthtPpREZGBrRaLcrKyjzz6urq0NDQAIvFAgCwWCyorq5Gc3Ozp6a0tBQGgwFpaWn9bYWIQohfWz7Lli3DjTfeiOTkZLS1tWHdunX48MMP8d5778FoNGLBggWe8VQMBgMWLVrk2dcHgBkzZiAtLQ3z5s3DihUrYLPZsHz5chQWFnK3iugS41f4NDc3Y/78+WhsbITRaER6ejree+89/PKXvwQAPP3001Cr1cjLy4PT6UROTg5Wr17t+XxYWBg2bdqEgoICWCwWDBkyBPn5+XjssccGdq2IKOj1+zofJfAiQ6Lg5M9vkxdQEJEiGD5EpAiGDxEpguFDRIpg+BCRIhg+RKQIhg8RKYLhQ0SKYPgQkSIYPkSkCIYPESmC4UNEimD4EJEiGD5EpAiGDxEpguFDRIpg+BCRIhg+RKQIhg8RKYLhQ0SKYPgQkSIYPkSkCIYPESmC4UNEimD4EJEiGD5EpAiGDxEpguFDRIpg+BCRIhg+RKQIhg8RKYLhQ0SKYPgQkSIYPkSkCIYPESmiX+HzxBNPQKVSYcmSJZ5pHR0dKCwsxLBhwxAVFYW8vDw0NTV5fa6hoQG5ubmIjIxEfHw8li5diu7u7v60QkQhps/hU1FRgRdeeAHp6ele03//+9/jf//7HzZs2IDy8nKcPHkSs2bN8sx3uVzIzc1FZ2cnPvroI7z88st46aWX8PDDD/d9LYgo9EgftLW1SWpqqpSWlsp1110nixcvFhGR1tZW0Wq1smHDBk/toUOHBIBYrVYREdm8ebOo1Wqx2Wyemueff14MBoM4nc4Lfl9HR4fY7XbP6/jx4wJA7HZ7X9onogCx2+0+/zb7tOVTWFiI3NxcZGdne02vrKxEV1eX1/SxY8ciOTkZVqsVAGC1WjFhwgSYTCZPTU5ODhwOBw4ePHjB7yspKYHRaPS8kpKS+tI2EQURv8Nn/fr12Lt3L0pKSr43z2azQafTISYmxmu6yWSCzWbz1Hw7eHrm98y7kGXLlsFut3tex48f97dtIgoyGn+Kjx8/jsWLF6O0tBTh4eGB6ul79Ho99Hr9oH0fEQWeX1s+lZWVaG5uxtVXXw2NRgONRoPy8nKsWrUKGo0GJpMJnZ2daG1t9fpcU1MTzGYzAMBsNn/v7FfP+54aIvrx8yt8pk+fjurqalRVVXleU6ZMwdy5cz1/1mq1KCsr83ymrq4ODQ0NsFgsAACLxYLq6mo0Nzd7akpLS2EwGJCWljZAq0VEwc6v3a7o6GiMHz/ea9qQIUMwbNgwz/QFCxaguLgYsbGxMBgMWLRoESwWC7KysgAAM2bMQFpaGubNm4cVK1bAZrNh+fLlKCws5K4V0SXEr/DxxdNPPw21Wo28vDw4nU7k5ORg9erVnvlhYWHYtGkTCgoKYLFYMGTIEOTn5+Oxxx4b6FaIKIipRESUbsJfDocDRqMRdrsdBoNB6XaI6Bv+/DZ5bxcRKYLhQ0SKYPgQkSIYPkSkCIYPESmC4UNEimD4EJEiGD5EpAiGDxEpguFDRIpg+BCRIhg+RKQIhg8RKYLhQ0SKYPgQkSIYPkSkCIYPESmC4UNEimD4EJEiGD5EpAiGDxEpguFDRIpg+BCRIhg+RKQIhg8RKYLhQ0SKYPgQkSIYPkSkCIYPESmC4UNEimD4EJEiGD5EpAiGDxEpguFDRIpg+BCRIhg+RKQIhg8RKUKjdAN9ISIAAIfDoXAnRPRtPb/Jnt9ob0IyfE6fPg0ASEpKUrgTIrqQtrY2GI3GXmtCMnxiY2MBAA0NDRddwWDjcDiQlJSE48ePw2AwKN2Oz9j34ArVvkUEbW1tSExMvGhtSIaPWv31oSqj0RhS/2G+zWAwhGTv7HtwhWLfvm4Q8IAzESmC4UNEigjJ8NHr9XjkkUeg1+uVbsVvodo7+x5codq3P1TiyzkxIqIBFpJbPkQU+hg+RKQIhg8RKYLhQ0SKYPgQkSJCMnyee+45jB49GuHh4cjMzMSePXsU7Wf79u245ZZbkJiYCJVKhbfffttrvojg4YcfRkJCAiIiIpCdnY36+nqvmpaWFsydOxcGgwExMTFYsGABzp49G9C+S0pKMHXqVERHRyM+Ph4zZ85EXV2dV01HRwcKCwsxbNgwREVFIS8vD01NTV41DQ0NyM3NRWRkJOLj47F06VJ0d3cHrO/nn38e6enpnqt/LRYLtmzZEtQ9X8gTTzwBlUqFJUuWhFzvA0JCzPr160Wn08mLL74oBw8elIULF0pMTIw0NTUp1tPmzZvlwQcflLfeeksAyMaNG73mP/HEE2I0GuXtt9+W/fv3y69+9StJSUmR8+fPe2puuOEGmThxonz88ceyY8cOGTNmjMyZMyegfefk5MjatWulpqZGqqqq5KabbpLk5GQ5e/asp+bee++VpKQkKSsrk08++USysrLkJz/5iWd+d3e3jB8/XrKzs2Xfvn2yefNmGT58uCxbtixgff/3v/+Vd955Rz799FOpq6uTP/3pT6LVaqWmpiZoe/6uPXv2yOjRoyU9PV0WL17smR4KvQ+UkAufadOmSWFhoee9y+WSxMREKSkpUbCr//Pd8HG73WI2m2XlypWeaa2traLX6+W1114TEZHa2loBIBUVFZ6aLVu2iEqlkhMnTgxa783NzQJAysvLPX1qtVrZsGGDp+bQoUMCQKxWq4h8HbxqtVpsNpun5vnnnxeDwSBOp3PQeh86dKj8+9//Dome29raJDU1VUpLS+W6667zhE8o9D6QQmq3q7OzE5WVlcjOzvZMU6vVyM7OhtVqVbCzH3b06FHYbDavno1GIzIzMz09W61WxMTEYMqUKZ6a7OxsqNVq7N69e9B6tdvtAP5v1IDKykp0dXV59T527FgkJyd79T5hwgSYTCZPTU5ODhwOBw4ePBjwnl0uF9avX49z587BYrGERM+FhYXIzc316hEIjb/vgRRSd7V/9dVXcLlcXn/xAGAymXD48GGFuuqdzWYDgAv23DPPZrMhPj7ea75Go0FsbKynJtDcbjeWLFmCa665BuPHj/f0pdPpEBMT02vvF1q3nnmBUl1dDYvFgo6ODkRFRWHjxo1IS0tDVVVV0PYMAOvXr8fevXtRUVHxvXnB/PcdCCEVPhQ4hYWFqKmpwc6dO5VuxSdXXnklqqqqYLfb8eabbyI/Px/l5eVKt9Wr48ePY/HixSgtLUV4eLjS7SgupHa7hg8fjrCwsO8d/W9qaoLZbFaoq9719NVbz2azGc3NzV7zu7u70dLSMijrVVRUhE2bNuGDDz7AyJEjPdPNZjM6OzvR2traa+8XWreeeYGi0+kwZswYZGRkoKSkBBMnTsQzzzwT1D1XVlaiubkZV199NTQaDTQaDcrLy7Fq1SpoNBqYTKag7T0QQip8dDodMjIyUFZW5pnmdrtRVlYGi8WiYGc/LCUlBWaz2atnh8OB3bt3e3q2WCxobW1FZWWlp2bbtm1wu93IzMwMWG8igqKiImzcuBHbtm1DSkqK1/yMjAxotVqv3uvq6tDQ0ODVe3V1tVd4lpaWwmAwIC0tLWC9f5fb7YbT6QzqnqdPn47q6mpUVVV5XlOmTMHcuXM9fw7W3gNC6SPe/lq/fr3o9Xp56aWXpLa2Vu6++26JiYnxOvo/2Nra2mTfvn2yb98+ASB///vfZd++fXLs2DER+fpUe0xMjPznP/+RAwcOyK233nrBU+2TJ0+W3bt3y86dOyU1NTXgp9oLCgrEaDTKhx9+KI2NjZ5Xe3u7p+bee++V5ORk2bZtm3zyySdisVjEYrF45vec+p0xY4ZUVVXJu+++K3FxcQE99fvAAw9IeXm5HD16VA4cOCAPPPCAqFQqef/994O25x/y7bNdodZ7f4Vc+IiI/POf/5Tk5GTR6XQybdo0+fjjjxXt54MPPhAA33vl5+eLyNen2x966CExmUyi1+tl+vTpUldX57WM06dPy5w5cyQqKkoMBoPcdddd0tbWFtC+L9QzAFm7dq2n5vz583LffffJ0KFDJTIyUn79619LY2Oj13K++OILufHGGyUiIkKGDx8uf/jDH6Srqytgff/2t7+VUaNGiU6nk7i4OJk+fboneIK15x/y3fAJpd77i+P5EJEiQuqYDxH9eDB8iEgRDB8iUgTDh4gUwfAhIkUwfIhIEQwfIlIEw4eIFMHwISJFMHyISBEMHyJSxP8H27vGckgb5/YAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 300x300 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "197.61543598839395"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "play(True)[-1]"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "第7章-DQN算法.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python [conda env:pt39]",
   "language": "python",
   "name": "conda-env-pt39-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
